logo

1 Load packages to extend base R

# Define packages that will be used to extend base R
package_names <- c("cansim", "DT", "ggplot2", "plyr", "plotly", "scales", "tidyverse", "xlsx") 

# Install any packages that do not exist
install_packages <- lapply(package_names, FUN = function(x) if(! require(x, character.only = TRUE)) install.packages(x))

# Load the packages
load_packages <- lapply(package_names, require, character.only = TRUE)

2 Import CANSIM table

d <- get_cansim("13-10-0766-01")

3 Data wrangling

3.1 Reshape data from long to wide format

d_wide <- spread(d %>% select("Case identifier number", "Case information", VALUE, REF_DATE), "Case information", VALUE)

3.2 Add leading zeros to case identifier number

d_wide$`Case identifier number` <- str_pad(d_wide$`Case identifier number`, width = nchar(max(as.numeric(d$`Case identifier number`))), pad = "0")

3.3 Restructure select vectors as factors

# Identify vectors
vectors_to_factor <- c("Age group", "Gender", "Transmission", "Hospitalization", "Intensive care unit", "Death")

# Restructure as factors
d_wide[vectors_to_factor] <- lapply(d_wide[vectors_to_factor], factor)

# Add semantic labels
d_wide$`Age group` <- revalue(d_wide$`Age group`, c("1" = "0-19", "2" = "20-29", "3" = "30-39", "4" = "40-49", "5" = "50-59", "6" = "60-69", "7" = "70-79", "8" = "80+", "99" = "Not stated"))
d_wide$Gender <- revalue(d_wide$Gender, c("1" = "Male", "2" = "Female", "7" = "Non-binary", "9" = "Not stated"))
d_wide$Transmission <- revalue(d_wide$Transmission, c("1" = "Travel exposure", "2" = "Community exposure", "3" = "Pending"))
d_wide$Hospitalization <- revalue(d_wide$Hospitalization, c("1" = "Yes", "2" = "No", "9" = "Not stated"))
d_wide$`Intensive care unit` <- revalue(d_wide$`Intensive care unit`, c("1" = "Yes", "2" = "No", "9" = "Not stated"))
d_wide$Death <- revalue(d_wide$Death, c("1" = "Yes", "2" = "No", "9" = "Not stated"))

3.4 Create episode date vector

# Add day, month and reference year vectors together and structure as a date object
d_wide$`Episode date` <- as.Date(paste0(d_wide$REF_DATE, "-", str_pad(d_wide$`Episode date - month`, 2, pad = "0"), "-", str_pad(d_wide$`Episode date - day`, 2, pad = "0")), format = "%Y-%m-%d")

# Change format to %d-%b-%y
d_wide$`Episode date` <- format(d_wide$`Episode date`, format = "%d-%b-%y")

3.5 Remove unwanted vectors from data

d_wide <- d_wide %>% select("Case identifier number", "Episode date", Gender, "Age group", Transmission, Hospitalization, "Intensive care unit", Death)

3.6 Rename vectors

names(d_wide) <- c("CaseID", "Episode Date", "Gender", "Age Group", "Exposure Setting", "Hospitalized", "Intensive Care Unit", "Death")

3.7 Order data by case ids in ascending order

d_wide <- d_wide %>% arrange(CaseID)

3.8 Export data to Excel

write.xlsx2(as.data.frame(d_wide), paste0("c:/users/joel/google drive/github/covid19/Table 13-10-0766-01 - updated ", format(Sys.time(), "%Y-%m-%d"), ".xlsx"), row.names = FALSE, showNA = FALSE)

4 Sortable/searchable raw data table

# Output data to JavaScript datatable
datatable(d_wide, 
  extensions = c("Buttons", "Scroller"), 
  options = list(
    pageLength = 25, 
    dom = "Bfrtip", 
    buttons = c("colvis", "copy", "csv", "excel", "pdf"), 
    deferRender = TRUE, 
    searchDelay = 500,
    initComplete = JS(
      "function(settings, json) {",
      "$(this.api().table().header()).css({'background-color': '#fff', 'color': '#111'});",
      "}"
    ),
    columnDefs = list(
      list(visible = FALSE, targets = c())
    )
  ), 
  rownames = FALSE,
  escape = FALSE,
  editable = TRUE
)

5 Incidence

5.1 Sortable/searchable data table by age group

# Convert episode date to date object
d_wide$`Episode Date` <- as.Date(d_wide$`Episode Date`, format = "%d-%b-%y")

# Remove cases with no episode date or with an age group value of "Not stated
d_wide <- d_wide %>% filter(! is.na(`Episode Date`) & `Age Group` != "Not stated")

# Drop "Not stated" level from the age group factor
d_wide$`Age Group` <- droplevels(d_wide$`Age Group`, "Not stated")

# Sort data by episode date
d_wide <- d_wide[order(d_wide$`Episode Date`),]

# Collapse several age group levels
#d_wide$`Age Group` <- fct_collapse(d_wide$`Age Group`, "20-59" = c("20-29", "30-39", "40-49", "50-59"))

# Create a crosstab
crosstab <- d_wide %>% group_by(`Age Group`, `Episode Date`) %>% tally()

# Rename the n vector
names(crosstab)[ncol(crosstab)] <- "Incidence"

# Compute cumulative incidence
crosstab <- crosstab %>% group_by(`Age Group`) %>% mutate(`Cumulative Incidence` = cumsum(Incidence))

# Output data to JavaScript datatable
datatable(crosstab, 
  extensions = c("Buttons", "Scroller"), 
  options = list(
    pageLength = 25, 
    dom = "Bfrtip", 
    buttons = c("colvis", "copy", "csv", "excel", "pdf"), 
    deferRender = TRUE, 
    searchDelay = 500,
    initComplete = JS(
      "function(settings, json) {",
      "$(this.api().table().header()).css({'background-color': '#fff', 'color': '#111'});",
      "}"
    ),
    columnDefs = list(
      list(visible = FALSE, targets = c())
    )
  ), 
  rownames = FALSE,
  escape = FALSE,
  editable = TRUE
)

5.2 Incidence plots by age group

# Print line plot
point_size <- 0.5
element_text_size <- 12
plot_width <- 900
plot_height <- 614
ggplotly(ggplot(crosstab, aes(x = `Episode Date`, y = Incidence)) +
    geom_line(aes(color = `Age Group`), size = point_size) +
    ggtitle("Incidence by age group") +
    xlab("Date") +
    ylab("Incidence") +
    theme_minimal() +
    theme(
       plot.title = element_text(size = element_text_size),
       axis.title.x = element_text(size = element_text_size),
       axis.title.y = element_text(size = element_text_size),
       legend.text = element_text(size = element_text_size),
       legend.title = element_blank()
  ), width = plot_width, height = plot_height)

5.3 Cumulative incidence plots by age group

# Print line plot
ggplotly(ggplot(crosstab, aes(x = `Episode Date`, y = `Cumulative Incidence`)) +
    geom_line(aes(color = `Age Group`), size = point_size) +
    ggtitle("Cumulative incidence by age group") +
    xlab("Date") +
    ylab("Cumulative incidence") +
    theme_minimal() +
    theme(
       plot.title = element_text(size = element_text_size),
       axis.title.x = element_text(size = element_text_size),
       axis.title.y = element_text(size = element_text_size),
       legend.text = element_text(size = element_text_size),
       legend.title = element_blank()
  ), width = plot_width, height = plot_height)